home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Pascal Super Library
/
Pascal Super Library (CW International)(1997).bin
/
COMPILER
/
LYSRC
/
LEX.PAS
next >
Wrap
Pascal/Delphi Source File
|
1993-01-23
|
19KB
|
686 lines
program Lex;
uses LexBase, LexTables, LexPos, LexDFA, LexOpt, LexList, LexRules, LexMsgs;
(* Copyright (c) 1990,91 by Albert Graef, Schillerstr. 18,
6509 Schornsheim/Germany
All rights reserved *)
(* TP Lex - A lexical analyzer generator for MS-DOS/Turbo Pascal
Version 3.0 as of April 91
Author
Albert Graef
Schillerstr. 18
6509 Schornsheim/Germany
Graef@DMZRZU71.bitnet
Synopsis LEX [options] lex-file[.L] [output-file[.PAS]]
Options
/v "Verbose:" Lex generates a readable description of the generated
lexical analyzer, written to lex-file with new extension .LST.
/o "Optimize:" Lex optimizes DFA tables to produce a minimal DFA
Description
This is a reimplementation of the popular UNIX lexical analyzer generator
Lex for MS-DOS and Turbo Pascal.
Differences from UNIX Lex:
- Produces output code for Turbo Pascal, rather than for C.
- Character tables (%T) are not supported; neither are any directives
to determine internal table sizes (%p, %n, etc.).
*)
procedure get_line;
(* obtain line from source file *)
begin
readln(yyin, line);
inc(lno);
end(*get_line*);
procedure next_section;
(* find next section mark (%%) in code template *)
var line : String;
begin
while not eof(yycod) do
begin
readln(yycod, line);
if line='%%' then exit;
writeln(yyout, line);
end;
end(*next_section*);
(* Semantic routines: *)
var n_rules : Integer; (* current number of rules *)
procedure define_start_state ( symbol : String; pos : Integer );
(* process start state definition *)
begin
with sym_table^[key(symbol, max_keys, lookup, entry)] do
if sym_type=none then
begin
inc(n_start_states);
if n_start_states>max_start_states then
fatal(state_table_overflow);
sym_type := start_state_sym;
start_state := n_start_states;
writeln(yyout, 'const ', symbol, ' = ', 2*start_state, ';');
first_pos_table^[2*start_state] := newIntSet;
first_pos_table^[2*start_state+1] := newIntSet;
end
else
error(symbol_already_defined, pos)
end(*define_start_state*);
procedure define_macro ( symbol, replacement : String );
(* process macro definition *)
begin
with sym_table^[key('{'+symbol+'}', max_keys, lookup, entry)] do
if sym_type=none then
begin
sym_type := macro_sym;
subst := newStr(replacement);
end
else
error(symbol_already_defined, 1)
end(*define_macro*);
procedure add_rule;
(* process rule *)
var i : Integer;
FIRST : IntSet;
begin
addExpr(r, FIRST);
if n_st=0 then
if cf then
setunion(first_pos_table^[1]^, FIRST)
else
begin
setunion(first_pos_table^[0]^, FIRST);
setunion(first_pos_table^[1]^, FIRST);
end
else
if cf then
for i := 1 to n_st do
setunion(first_pos_table^[2*st[i]+1]^, FIRST)
else
for i := 1 to n_st do
begin
setunion(first_pos_table^[2*st[i]]^, FIRST);
setunion(first_pos_table^[2*st[i]+1]^, FIRST);
end
end(*add_rule*);
procedure generate_table;
(* write the DFA table to the output file
Tables are represented as a collection of typed array constants:
type YYTRec = record
cc : set of Char; { characters }
s : Integer; { next state }
end;
const
{ table sizes: }
yynmarks = ...;
yynmatches = ...;
yyntrans = ...;
yynstates = ...;
{ rules of mark positions for each state: }
yyk : array [1..yynmarks] of Integer = ...;
{ rules of matches for each state: }
yym : array [1..yynmatches] of Integer = ...;
{ transition table: }
yyt : array [1..yyntrans] of YYTRec = ...;
{ offsets into the marks, matches and transition tables: }
yykl, yykh,
yyml, yymh,
yytl, yyth : array [0..yynstates-1] of Integer = ...;
*)
var yynmarks, yynmatches, yyntrans, yynstates : Integer;
yykl, yykh,
yyml, yymh,
yytl, yyth : array [0..max_states-1] of Integer;
procedure counters;
(* compute counters and offsets *)
var s, i : Integer;
begin
yynstates := n_states; yyntrans := n_trans;
yynmarks := 0; yynmatches := 0;
for s := 0 to n_states-1 do with state_table^[s] do
begin
yytl[s] := trans_lo; yyth[s] := trans_hi;
yykl[s] := yynmarks+1; yyml[s] := yynmatches+1;
for i := 1 to size(state_pos^) do
with pos_table^[state_pos^[i]] do
if pos_type=mark_pos then
if pos=0 then
inc(yynmatches)
else if pos=1 then
inc(yynmarks);
yykh[s] := yynmarks; yymh[s] := yynmatches;
end;
end(*counters*);
procedure writecc(var f : Text; cc : CClass);
(* print the given character class *)
function charStr(c : Char) : String;
begin
case c of
#0..#31, (* nonprintable characters *)
#127..#255 : charStr := '#'+intStr(ord(c));
'''' : charStr := '''''''''';
else charStr := ''''+c+'''';
end;
end(*charStr*);
var c1, c2 : Char;
col : Integer;
tag : String;
begin
write(f, '[ ');
col := 0;
for c1:=#0 to #255 do
if c1 in cc then
begin
if col>0 then
begin
write(f, ',');
inc(col);
end;
if col>40 then
(* insert line break *)
begin
writeln(f);
write(f, ' ':12);
col := 0;
end;
c2 := c1;
while (c2<#255) and (succ(c2) in cc) do
inc(c2);
if c1=c2 then
tag := charStr(c1)
else if c2=succ(c1) then
tag := charStr(c1)+','+charStr(c2)
else
tag := charStr(c1)+'..'+charStr(c2);
write(f, tag);
inc(col, length(tag));
c1 := c2
end;
write(f, ' ]');
end(*writecc*);
procedure tables;
(* print tables *)
var s, i, count : Integer;
begin
writeln(yyout);
writeln(yyout, 'type YYTRec = record');
writeln(yyout, ' cc : set of Char;');
writeln(yyout, ' s : Integer;');
writeln(yyout, ' end;');
writeln(yyout);
writeln(yyout, 'const');
(* table sizes: *)
writeln(yyout);
writeln(yyout, 'yynmarks = ', yynmarks, ';');
writeln(yyout, 'yynmatches = ', yynmatches, ';');
writeln(yyout, 'yyntrans = ', yyntrans, ';');
writeln(yyout, 'yynstates = ', yynstates, ';');
(* mark table: *)
writeln(yyout);
writeln(yyout, 'yyk : array [1..yynmarks] of Integer = (');
count := 0;
for s := 0 to n_states-1 do with state_table^[s] do
begin
writeln(yyout, ' { ', s, ': }');
for i := 1 to size(state_pos^) do
with pos_table^[state_pos^[i]] do
if (pos_type=mark_pos) and (pos=1) then
begin
write(yyout, ' ', rule); inc(count);
if count<yynmarks then write(yyout, ',');
writeln(yyout);
end;
end;
writeln(yyout, ');');
(* match table: *)
writeln(yyout);
writeln(yyout, 'yym : array [1..yynmatches] of Integer = (');
count := 0;
for s := 0 to n_states-1 do with state_table^[s] do
begin
writeln(yyout, '{ ', s, ': }');
for i := 1 to size(state_pos^) do
with pos_table^[state_pos^[i]] do
if (pos_type=mark_pos) and (pos=0) then
begin
write(yyout, ' ', rule); inc(count);
if count<yynmatches then write(yyout, ',');
writeln(yyout);
end;
end;
writeln(yyout, ');');
(* transition table: *)
writeln(yyout);
writeln(yyout, 'yyt : array [1..yyntrans] of YYTrec = (');
count := 0;
for s := 0 to n_states-1 do with state_table^[s] do
begin
writeln(yyout, '{ ', s, ': }');
for i := trans_lo to trans_hi do
with trans_table^[i] do
begin
write(yyout, ' ( cc: ');
writecc(yyout, cc^);
write(yyout, '; s: ');
write(yyout, next_state, ')');
inc(count);
if count<yyntrans then write(yyout, ',');
writeln(yyout);
end;
end;
writeln(yyout, ');');
(* offset tables: *)
writeln(yyout);
writeln(yyout, 'yykl : array [0..yynstates-1] of Integer = (');
for s := 0 to n_states-1 do
begin
write(yyout, '{ ', s, ': } ', yykl[s]);
if s<n_states-1 then write(yyout, ',');
writeln(yyout);
end;
writeln(yyout, ');');
writeln(yyout);
writeln(yyout, 'yykh : array [0..yynstates-1] of Integer = (');
for s := 0 to n_states-1 do
begin
write(yyout, '{ ', s, ': } ', yykh[s]);
if s<n_states-1 then write(yyout, ',');
writeln(yyout);
end;
writeln(yyout, ');');
writeln(yyout);
writeln(yyout, 'yyml : array [0..yynstates-1] of Integer = (');
for s := 0 to n_states-1 do
begin
write(yyout, '{ ', s, ': } ', yyml[s]);
if s<n_states-1 then write(yyout, ',');
writeln(yyout);
end;
writeln(yyout, ');');
writeln(yyout);
writeln(yyout, 'yymh : array [0..yynstates-1] of Integer = (');
for s := 0 to n_states-1 do
begin
write(yyout, '{ ', s, ': } ', yymh[s]);
if s<n_states-1 then write(yyout, ',');
writeln(yyout);
end;
writeln(yyout, ');');
writeln(yyout);
writeln(yyout, 'yytl : array [0..yynstates-1] of Integer = (');
for s := 0 to n_states-1 do
begin
write(yyout, '{ ', s, ': } ', yytl[s]);
if s<n_states-1 then write(yyout, ',');
writeln(yyout);
end;
writeln(yyout, ');');
writeln(yyout);
writeln(yyout, 'yyth : array [0..yynstates-1] of Integer = (');
for s := 0 to n_states-1 do
begin
write(yyout, '{ ', s, ': } ', yyth[s]);
if s<n_states-1 then write(yyout, ',');
writeln(yyout);
end;
writeln(yyout, ');');
writeln(yyout);
end(*tables*);
begin
counters; tables;
end(*generate_table*);
(* Parser: *)
const
max_items = 255;
var
itemstr : String;
itemc : Integer;
itempos,
itemlen : array [1..max_items] of Integer;
procedure split ( str : String; count : Integer );
(* split str into at most count whitespace-delimited items
(result in itemstr, itemc, itempos, itemlen) *)
procedure scan(var act_pos : Integer);
(* scan one item *)
var l : Integer;
begin
while (act_pos<=length(itemstr)) and
((itemstr[act_pos]=' ') or (itemstr[act_pos]=tab)) do
inc(act_pos);
l := 0;
while (act_pos+l<=length(itemstr)) and
(itemstr[act_pos+l]<>' ') and (itemstr[act_pos+l]<>tab) do
inc(l);
inc(itemc);
itempos[itemc] := act_pos;
itemlen[itemc] := l;
inc(act_pos, l+1);
while (act_pos<=length(itemstr)) and
((itemstr[act_pos]=' ') or (itemstr[act_pos]=tab)) do
inc(act_pos);
end(*scan*);
var i, act_pos : Integer;
begin
itemstr := str; act_pos := 1;
itemc := 0;
while (itemc<count-1) and (act_pos<=length(itemstr)) do scan(act_pos);
if act_pos<=length(itemstr) then
begin
inc(itemc);
itempos[itemc] := act_pos;
itemlen[itemc] := length(itemstr)-act_pos+1;
end;
end(*split*);
function itemv ( i : Integer ) : String;
(* return ith item in splitted string (whole string for i=0) *)
begin
if i=0 then
itemv := itemstr
else if (i<0) or (i>itemc) then
itemv := ''
else
itemv := copy(itemstr, itempos[i], itemlen[i])
end(*itemv*);
procedure code;
begin
while not eof(yyin) do
begin
get_line;
if line='%}' then
exit
else
writeln(yyout, line);
end;
error(unmatched_lbrace, length(line)+1);
end(*code*);
procedure definitions;
procedure definition;
function check_id ( symbol : String ) : Boolean;
var i : Integer;
begin
if (symbol='') or not (symbol[1] in letters) then
check_id := false
else
begin
for i := 2 to length(symbol) do
if not (symbol[i] in alphanums) then
begin
check_id := false;
exit;
end;
check_id := true
end
end(*check_id*);
var i : Integer;
com : String;
begin
split(line, 2);
com := upper(itemv(1));
if (com='%S') or (com='%START') then
begin
split(line, max_items);
for i := 2 to itemc do
if check_id(itemv(i)) then
define_start_state(itemv(i), itempos[i])
else
error(syntax_error, itempos[i]);
end
else if check_id(itemv(1)) then
define_macro(itemv(1), itemv(2))
else
error(syntax_error, 1);
end(*definition*);
begin
while not eof(yyin) do
begin
get_line;
if line='' then
writeln(yyout)
else if line='%%' then
exit
else if line='%{' then
code
else if (line[1]='%') or (line[1] in letters) then
definition
else
writeln(yyout, line)
end;
end(*definitions*);
procedure rules;
begin
next_section;
if line='%%' then
while not eof(yyin) do
begin
get_line;
if line='' then
writeln(yyout)
else if line='%%' then
begin
next_section;
exit;
end
else if line='%{' then
code
else if (line[1]<>' ') and (line[1]<>tab) then
begin
if n_rules=0 then next_section;
inc(n_rules);
parse_rule(n_rules);
if errors=0 then
begin
add_rule;
write(yyout, ' ', n_rules);
if strip(stmt)='|' then
writeln(yyout, ',')
else
begin
writeln(yyout, ':');
writeln(yyout, blankStr(expr), stmt);
end;
end
end
else
writeln(yyout, line)
end
else
error(unexpected_eof, length(line)+1);
next_section;
end(*rules*);
procedure auxiliary_procs;
begin
if line='%%' then
begin
writeln(yyout);
while not eof(yyin) do
begin
get_line;
writeln(yyout, line);
end;
end;
end(*auxiliary_procs*);
(* Main program: *)
var i : Integer;
begin
(* sign-on: *)
writeln(sign_on);
(* parse command line: *)
if paramCount=0 then
begin
writeln(usage);
writeln(options);
halt(0);
end;
lfilename := '';
pasfilename := '';
for i := 1 to paramCount do
if copy(paramStr(i), 1, 1)='/' then
if upper(paramStr(i))='/V' then
verbose := true
else if upper(paramStr(i))='/O' then
optimize := true
else
begin
writeln(invalid_option, paramStr(i));
halt(1);
end
else if lfilename='' then
lfilename := addExt(upper(paramStr(i)), 'L')
else if pasfilename='' then
pasfilename := addExt(upper(paramStr(i)), 'PAS')
else
begin
writeln(illegal_no_args);
halt(1);
end;
if lfilename='' then
begin
writeln(illegal_no_args);
halt(1);
end;
if pasfilename='' then pasfilename := root(lfilename)+'.PAS';
lstfilename := root(lfilename)+'.LST';
(* open files: *)
assign(yyin, lfilename);
assign(yyout, pasfilename);
assign(yylst, lstfilename);
reset(yyin); if ioresult<>0 then fatal(cannot_open_file+lfilename);
rewrite(yyout); if ioresult<>0 then fatal(cannot_open_file+pasfilename);
rewrite(yylst); if ioresult<>0 then fatal(cannot_open_file+lstfilename);
(* search code template in current directory, then on path where Lex
was executed from: *)
codfilename := 'YYLEX.COD';
assign(yycod, codfilename);
reset(yycod);
if ioresult<>0 then
begin
codfilename := upper(path(paramStr(0)))+'YYLEX.COD';
assign(yycod, codfilename);
reset(yycod);
if ioresult<>0 then fatal(cannot_open_file+codfilename);
end;
(* parse source grammar: *)
write('parse ... ');
lno := 0; n_rules := 0; next_section;
first_pos_table^[0] := newIntSet;
first_pos_table^[1] := newIntSet;
definitions;
rules;
if n_rules=0 then error(empty_grammar, length(line)+1);
if errors=0 then
begin
(* generate DFA table and listings and write output code: *)
write('DFA construction ... ');
makeDFATable;
if optimize then
begin
write('DFA optimization ... ');
optimizeDFATable;
end;
write('code generation ... ');
if verbose then listDFATable;
generate_table; next_section;
end;
auxiliary_procs;
if errors=0 then writeln('DONE');
(* close files: *)
close(yyin); close(yyout); close(yylst); close(yycod);
(* print statistics: *)
if errors>0 then
writeln( lno, ' lines, ',
errors, ' errors found.' )
else
writeln( lno, ' lines, ',
n_rules, ' rules, ',
n_pos, '/', max_pos, ' p, ',
n_states, '/', max_states, ' s, ',
n_trans, '/', max_trans, ' t.');
if warnings>0 then writeln(warnings, ' warnings.');
writeln( n_bytes, '/', max_bytes, ' bytes of memory used.');
(* terminate: *)
if errors>0 then erase(yyout);
if file_size(lstfilename)=0 then
erase(yylst)
else
writeln('(see ', lstfilename, ' for more information)');
halt(errors);
end(*Lex*).